rm(list=ls(all=TRUE))

library(mc2d)

###load data: Y enters first, followed by X variables###

ds <- read.delim("Costa Rica Survey Final.txt",header=TRUE)	# Read into memory a tab delimited file and assign it to a data frame ds
attach(ds)    						# make variables available by name 

dat <- ds[c("P77","P107")]
dat <- dat[P104==0,]
dat<-as.data.frame(na.omit(dat))
n<-nrow(dat)

## prob. of 1 for randomizing device or group status indicator ##

p <- 0.2635659

n.direct <- sum(as.numeric(dat[,2]==1)+ as.numeric(dat[,2]==2))
mu.direct    <- sum(as.numeric(dat[,2]==1))/n.direct
se.direct    <- sqrt(mu.direct*(1-mu.direct)/n.direct)
mu.direct.CI <- c(mu.direct-1.96*se.direct,mu.direct+1.96*se.direct)

n.sst <- sum(as.numeric(dat[,1]==1)+ as.numeric(dat[,1]==2))
y.bar  <- sum(as.numeric(dat[,1]==1))/n.sst
mu.sst <- (y.bar+p-1)/(2*p-1)
se.sst <- sqrt( mu.sst*(1-mu.sst)/n.sst + (p*(1-p))/(n.sst*(2*p-1)^2))
mu.sst.CI <- c(mu.sst-1.96*se.sst,mu.sst+1.96*se.sst)


Y <- 1*as.numeric(dat[,1]==2&dat[,2]==2) + 2*as.numeric(dat[,1]==1&dat[,2]==2) + 3*as.numeric(dat[,1]==2&dat[,2]==1) + 4*as.numeric(dat[,1]==1&dat[,2]==1) + 5*as.numeric(dat[,1]==2&dat[,2]==3|dat[,1]==2&dat[,2]==99) + 6*as.numeric(dat[,1]==1&dat[,2]==3|dat[,1]==1&dat[,2]==99)
Y <- Y[Y!=0]

tab.Y<-table(Y)

n1  <- tab.Y[1]
n2  <- tab.Y[2]
n3  <- tab.Y[3]
n4  <- tab.Y[4]
n5  <- tab.Y[5]
n6  <- tab.Y[6]

n <- n1+n2+n3+n4+n5+n6
ncat<-c(n1,n2,n3,n4,n5,n6)


## Set up function to calculate parameter values according to E-M algorithm ##

EM <- function(data,par) { 

mu    <-par[1]
lamT1 <-par[2]
lamL1 <-par[3]
lamT0 <-par[4]

n1.0  <- data[1]*((p*lamT0*(1-mu))/(p*lamT0*(1-mu) + (1-p)*lamL1*mu))
n1.1  <- data[1] - n1.0
n2.0  <- data[2]*(((1-p)*lamT0*(1-mu))/((1-p)*lamT0*(1-mu) + p*lamL1*mu))
n2.1  <- data[2] - n2.0
n5.0 <- data[5]*((p*(1-lamT0)*(1-mu))/(p*(1-lamT0)*(1-mu) + (1-p)*(1-lamT1-lamL1)*mu)) 
n5.1 <- data[5] - n5.0
n6.0 <- data[6]*(((1-p)*(1-lamT0)*(1-mu))/((1-p)*(1-lamT0)*(1-mu) + p*(1-lamT1-lamL1)*mu))
n6.1 <- data[6] - n6.0

n.A <- data[3] + data[4]
n.B <- n1.0 + n2.0
n.C <- n5.0 + n6.0
n.D <- n1.1 + n2.1
n.E <- n5.1 + n6.1

mu.j    <- (n.A + n.D + n.E)/(n.A + n.D + n.E + n.B + n.C)
lamT1.j <- n.A/(n.A + n.D + n.E)
lamL1.j <- n.D/(n.A + n.D + n.E)
lamT0.j <- n.B/(n.B + n.C)

est.j <- c(mu.j,lamT1.j,lamL1.j,lamT0.j)
est.j}

tol <- .Machine$double.eps # tolerance for stopping algorithm
M<-3000

init<-c(.2,.2,.2,.2) # initial parameter estimates
pars.old <- EM(ncat,init)

for (j in 1:M){

pars.new <- EM(ncat,pars.old)

abs.diff <- abs(pars.new-pars.old)

if (max(abs.diff) < tol ) break

pars.old <- pars.new}

### parameter estimates and number of iterations necessary for convergence
EM.est<-pars.new
iter<-j


###Bootstrap Uncertainty Estimates###

n.boot <- 1000 					#number of bootstrap samples#
pars.boot <- matrix(NA,nrow=n.boot,ncol=length(pars.new))
n.sim <- n    			## sample size for each bootstrap sample ##

##Set up fitted multinomial density##

mu.mle    <-pars.new[1] 
lamT1.mle <-pars.new[2] 
lamL1.mle <-pars.new[3] 
lamT0.mle <-pars.new[4]

gam1 <- p*lamT0.mle*(1-mu.mle) + (1-p)*lamL1.mle*mu.mle
gam2 <- (1-p)*lamT0.mle*(1-mu.mle) + p*lamL1.mle*mu.mle
gam3 <- (1-p)*lamT1.mle*mu.mle
gam4 <- p*lamT1.mle*mu.mle
gam5 <- p*(1-lamT0.mle)*(1-mu.mle) + (1-p)*(1-lamT1.mle-lamL1.mle)*mu.mle
gam6 <- (1-p)*(1-lamT0.mle)*(1-mu.mle) + p*(1-lamT1.mle-lamL1.mle)*mu.mle
Mprob <- cbind(gam1,gam2,gam3,gam4,gam5,gam6) 

##Draw bootstrap samples from density and estimate parameter vector for each sample##

for (k in 1:n.boot) { 

Y.boot <- rmultinomial(n=n.sim,size=1,Mprob)

n1.boot <- sum(Y.boot[,1])
n2.boot <- sum(Y.boot[,2])
n3.boot <- sum(Y.boot[,3])
n4.boot <- sum(Y.boot[,4])
n5.boot <- sum(Y.boot[,5])
n6.boot <- sum(Y.boot[,6])

ncat.boot <- c(n1.boot,n2.boot,n3.boot,n4.boot,n5.boot,n6.boot)

pars.oldB <- EM(ncat.boot,init)

for (j in 1:M){

pars.newB <- EM(ncat.boot,pars.oldB)
abs.diffB <- abs(pars.newB-pars.oldB)

if (max(abs.diffB) < tol ) break
pars.oldB <- pars.newB
}

pars.boot[k,]<-pars.newB
}


V       <- cov(pars.boot)
SEs     <- sqrt(diag(V))
mu.CI   <- quantile(pars.boot[,1],c(.025,.975))
lamT1.CI<- quantile(pars.boot[,2],c(.025,.975))
lamL1.CI<- quantile(pars.boot[,3],c(.025,.975))
lamT0.CI<- quantile(pars.boot[,4],c(.025,.975))
conf.int<- rbind(mu.CI,lamT1.CI,lamL1.CI,lamT0.CI)

out1<-cbind(EM.est,SEs,conf.int)
colnames(out1)<-c("estimate","SE","2.5%","97.5%")
rownames(out1)<-c("mu","lamT1","lamL1","lamT0")
print(list(Joint.Model.summary=out1,iterations=iter,tol=tol,N.obs=n))

out2<-c(mu.direct,se.direct,mu.direct.CI)
names(out2)<-c("estimate","SE","2.5%","97.5%")
print(list(DQ.Model.summary=out2))

out3<-c(mu.sst,se.sst,mu.sst.CI)
names(out3)<-c("estimate","SE","2.5%","97.5%")
print(list(SST.Model.summary=out3)) 



